path <- "/Users/eborbath/Library/CloudStorage/Dropbox-WZB/Endre Borbath/Papers/Unions/Replication_material/" # you need to correct it here

library(dplyr)
library(lubridate)
library(countrycode)
library(tidyr)
library(ggplot2)
library(readxl)
library(ggmosaic)
library(cowplot)
options(scipen = 999)

filetype="png"

## Figure 1 - public, private, both and unknown union protests per region (average country, average year)

pea_dat <-  read.csv(paste0(path, "PEA_2000_2021/PEA_integrated_2000_2021_V2_cleaner_longcov.csv"))

dat <- pea_dat %>% 
  filter(!(country_name %in% c("malta", "iceland", "luxembourg"))) %>% 
  mutate(date2=ifelse(year==2021, date, as.character(NA))) %>% 
  mutate(date=ifelse(year==2021, as.character(NA), date)) %>% 
  mutate(date=ymd(date)) %>% 
  mutate(date2=dmy(date2)) %>% 
  mutate(date=ifelse(year==2021, date2, date)) %>% 
  mutate(date=as.Date(date)) %>% 
  mutate(country_name=ifelse(country_name=="northern ireland", "united kingdom", country_name)) %>%
  mutate(country_iso = countryname(country_name, "iso2c")) %>% 
  mutate(region_Visser = case_when(
    country_iso %in% c("SE", "DK", "NO", "FI", "IS") ~ "Corporatism (North)",
    country_iso %in% c("DE", "SI", "BE", "AT", "CH", "NL", "LU") ~ "Social-Partnership (West)",
    country_iso %in% c("GR", "FR", "IT", "ES", "PT", "MT") ~ "State-centered (South)",
    country_iso %in% c("CY", "IE", "GB", "UK") ~ "Liberal",
    country_iso %in% c("HU", "CZ", "EE", "BG", "LT", "SK", "RO", "LV", "PL") ~ "Transitional (East)",
    TRUE ~ NA_character_)) %>% 
  mutate(region_Visser=factor(region_Visser, levels=c("Corporatism (North)", "Social-Partnership (West)", "State-centered (South)", "Liberal", "Transitional (East)"))) %>% 
  mutate(year=year(date)) %>%
  mutate_at(vars(starts_with("actor_union")), ~ifelse(.=="yes", 1, 0)) %>% 
  mutate(actor_union_unknown=ifelse(actor_union_both + actor_union_public + actor_union_private > 0, 0, actor_union_unknown)) %>%
  select(starts_with("actor_union"), weighted_event, weighted_part_all, country_iso, date, year, region_Visser)
  

events <- dat %>% 
  mutate_at(vars(starts_with("actor_union")), ~.*weighted_event) %>% 
  select(starts_with("actor_union"), country_iso, year, region_Visser) %>%
  group_by(region_Visser, country_iso, year) %>%
  summarise_all(sum) 

full_grid <- expand.grid(country_iso=unique(dat$country_iso),
                         year = unique(dat$year))

events <- merge(full_grid, events, all.x = TRUE) %>% 
  group_by(country_iso) %>% 
  fill(region_Visser, .direction = "updown") %>%
  ungroup(.) %>% 
  mutate_at(vars(starts_with("actor_union")), ~ifelse(is.na(.), 0, .)) %>% 
  select(-country_iso, -year) %>% # adjust for over time
  group_by(region_Visser) %>% 
  summarise_all(mean) %>% 
  pivot_longer(cols=starts_with("actor_union"), names_to="union", values_to="events") %>% 
  mutate(union=case_when(
    union=="actor_union_both" ~ "Both",
    union=="actor_union_private" ~ "Private",
    union=="actor_union_public" ~ "Public",
    union=="actor_union_unknown" ~ "Unknown")) %>%
  mutate(union=factor(union, levels=c("Private", "Public", "Both", "Unknown"))) %>% 
  group_by(region_Visser) %>% #adjust for over time
  mutate(events=events/sum(events)) %>%
  ungroup(.) %>% 
  mutate(events=ifelse(is.na(events), 0, events))
  
ggplot(events, aes(x=union, y=events)) +
  geom_bar(stat = "identity") +
  ylab("Share of protests (within union protests in each region)") +
  facet_wrap(~region_Visser, scales="free_y", ncol=3) +
  theme_bw() +
  theme(axis.title.x = element_blank())

ggsave(paste0(path, "Appendix/public_private_2000-2021", ".",filetype), 
       width=5, 
       height=3, 
       scale=1.4,
       dpi=300)

## Figure 2 - seven most common unions per region and associated share of protests within region

dat <- read_excel(paste0(path, "PEA_to_code_RL.xlsx"))

dat <- dat %>% 
  select(country_name, year, contains("Name of the union"), id_event) %>% 
  pivot_longer(cols=contains("Name of the union"), names_to="union", values_to="name") %>% 
  select(-union) %>% 
  filter(!is.na(name)) %>% 
  mutate(country_name=ifelse(country_name=="northern ireland", "united kingdom", country_name)) %>%
  mutate(country_iso = countryname(country_name, "iso2c")) %>% 
  mutate(name=paste0(name, " (", country_iso, ")")) %>% 
  mutate(region_Visser = case_when(
    country_iso %in% c("SE", "DK", "NO", "FI", "IS") ~ "Corporatism (North)",
    country_iso %in% c("DE", "SI", "BE", "AT", "CH", "NL", "LU") ~ "Social-Partnership (West)",
    country_iso %in% c("GR", "FR", "IT", "ES", "PT", "MT") ~ "State-centered (South)",
    country_iso %in% c("CY", "IE", "GB", "UK") ~ "Liberal",
    country_iso %in% c("HU", "CZ", "EE", "BG", "LT", "SK", "RO", "LV", "PL") ~ "Transitional (East)",
    TRUE ~ NA_character_)) %>% 
  mutate(region_Visser=factor(region_Visser, levels=c("Corporatism (North)", "Social-Partnership (West)", "State-centered (South)", "Liberal", "Transitional (East)"))) %>% 
  select(-country_iso, -year)

weights <- pea_dat %>% 
  select(id_event, weighted_event)

dat <- merge(dat, weights, all.x = TRUE) %>% 
  group_by(region_Visser, name) %>%
  mutate(events=sum(weighted_event, na.rm=TRUE)) %>%
  select(region_Visser, name, events) %>%
  unique() %>% 
  group_by(region_Visser) %>%
  mutate(events=events/sum(events, na.rm=TRUE)) %>% 
  unique() %>% 
  ungroup(.) %>% 
  arrange(region_Visser, desc(events)) %>% 
  group_by(region_Visser) %>%
  mutate(rank=row_number()) %>% 
  filter(rank<=7)
  
ggplot(dat, aes(y=reorder(name, events), x=events)) +
  geom_bar(stat = "identity") +
  xlab("Share of protests (within union protests in each region)") +
  ylab("Union") +
  facet_wrap(~region_Visser, scales="free_y", ncol=3) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1), 
        axis.title.y=element_blank())

ggsave(paste0(path, "Appendix/7top_names_2016-2021", ".",filetype), 
       width=5, 
       height=3, 
       scale=2.2,
       dpi=300)

## Figure 3 - sectoral share in union sponsored protest events (only unions with at least 5 events, two-thirds of the sample)

dat <- read_excel(paste0(path, "/PEA_to_code_RL.xlsx"))

dat <- dat %>% 
  select(country_name, year, contains("Name of the union"), id_event) %>% 
  pivot_longer(cols=contains("Name of the union"), names_to="union", values_to="name") %>% 
  select(-union) %>% 
  filter(!is.na(name)) %>% 
  mutate(country_name=ifelse(country_name=="northern ireland", "united kingdom", country_name)) %>%
  mutate(country_iso = countryname(country_name, "iso2c")) 

weights <- pea_dat %>% 
  select(id_event, weighted_event, weighted_part_all)

dat <- merge(dat, weights, all.x = TRUE) %>% 
  group_by(country_iso, name) %>%
  mutate(events=sum(weighted_event, na.rm=TRUE),
         participants=sum(weighted_part_all, na.rm = TRUE)) %>% 
  select(country_iso, name, events, participants) %>%
  unique()

dat_names <- read_excel(paste0(path, "/wikipedia_union_names_RL.xlsx")) %>% 
  rename(name=union_name)

dat <- merge(dat_names, dat, by="name", all.x = TRUE)

sectors <- dat %>% 
  mutate(sector=case_when(sector=="Confederation/public sector" ~ "Confederation", 
                          is.na(sector) ~ "Other", 
                          TRUE ~ sector)) %>% 
  group_by(sector) %>% 
  summarise(events=sum(events, na.rm=TRUE), participants=sum(participants, na.rm=TRUE)) %>% 
  ungroup(.) %>% 
  mutate_at(vars(events, participants), ~./sum(.)) %>% 
  pivot_longer(cols=c(events, participants), names_to="metric", values_to="value") %>% 
  mutate(metric=factor(metric, levels=c("participants", "events"))) %>% 
  filter(metric=="events")

ggplot(sectors, aes(y=reorder(sector, value), x=value)) +
  geom_bar(stat = "identity") + #aes(fill=metric), , position="dodge"
  xlab("Sectoral share in union sponsored protest events") +
  ylab("Union") +
  theme_bw() +
  # guides(fill = guide_legend(reverse=T)) + 
  theme(axis.text.x = element_text(angle = 90, hjust = 1), 
        axis.title.y=element_blank(),
        legend.position="bottom",
        legend.title = element_blank()) 

ggsave(paste0(path, "Appendix/sectors_2016-2021", ".",filetype), 
       width=5, 
       height=3, 
       scale=1.3,
       dpi=300)

## Figure 5 - membership and protest (only unions with at least 5 events)
## 
## 
all_members <- read.csv(paste0(path, "Visser dataset/OECD-AIAS-ICTWSS-CSV_v1.1.csv")) %>% 
select(country, TUM, year) %>% 
  arrange(country, -year) %>% 
  mutate(TUM=ifelse(TUM==-88, NA, TUM)) %>% 
  filter(!(is.na(TUM))) %>% 
  group_by(country) %>%
  mutate(rank=row_number()) %>%
  ungroup() %>% 
  filter(rank==1) %>% 
  select(-rank) %>% 
  mutate(country_iso = countryname(country, "iso2c")) %>% 
  filter(country_iso %in% dat$country_iso)

all_members

all_members <- all_members %>% 
  select(-country, -year)

dat <- merge(dat, all_members, by="country_iso", all.x = TRUE) 

plot_dat <- dat %>% 
  select(country_iso, TUM, Members, events, participants) %>% 
  mutate(Members=ifelse(Members=="?", NA, Members)) %>%
  mutate(Members=as.numeric(Members)/1000) %>% 
  mutate(Members_share=Members/TUM) 


plot_events <- ggplot(plot_dat, aes(x=Members_share, y=events)) +
  geom_point(color="gray50") +
  geom_smooth(method="lm", se=FALSE, color="black", linetype="dashed") +
  xlab("Share of union membership") +
  ylab("Number of protest events") +
  theme_bw() 

plot_participants <- ggplot(plot_dat, aes(x=Members_share, y=participants)) +
  geom_point(color="gray50") +
  geom_smooth(method="lm", se=FALSE, color="black", linetype="dashed") +
  xlab("Share of union membership") +
  ylab("Number of participants in protest events") +
  theme_bw() 

plot_grid(plot_events, plot_participants)


ggsave(paste0(path, "Appendix/membership_2016-2021", ".",filetype), 
       width=5, 
       height=3, 
       scale=1.3,
       dpi=300)